Until this vignette, we only used genes features to try to identify and analyze methylation. In this new one, we propose to use methylation features, which means CGI-related features to extract methylation matrix.
In a first part, we’ll present a per-gene visualisation tool and in the second one, a wide database visualisation trough heatmap pipeline, as in the previous vignettes.
Notably, a table named cgi_pf is used, of dimension 27k * 5, processed by hand from TCGA database and giving coordinates, width and center of each CpG islands (CGI) indexed on epic database. Is also used a cgi_coordinates which gives the closest cgi coordinates for each probe, extracted and processed from TCGA database.
targeted_genes <- penda_superup_deregulated
features <- get_features(targeted_genes, study = trscr_lusc, up_str = 7500, dwn_str = 7500)
if(!exists("cgi_indexed_probes")){
if(!file.exists("~/projects/supergenes/data/RDS/cgi_indexed_probes")){
cgi_indexed_probes <- get_cgis_probes()
saveRDS(cgi_indexed_probes,"~/projects/supergenes/data/RDS/cgi_indexed_probes")
}
cgi_indexed_probes <- readRDS("~/projects/supergenes/data/RDS/cgi_indexed_probes")
}
feat_closest_cgi <- get_genes_closest_cgis()
Firstly, we defined a plot_gene_cgi, showing CGI and probes in a given window (fixed here at [100k,100k]).
plot_gene_cgi("FKBP4")
plot_gene_cgi("OTX1")
plot_gene_cgi("CDT1")
Note that probes are sorted by coordinates. Thus, the heatmap is readable from left to right.
The following plot is a visualisation of CGI around the TSS For each plot : a parametrized window surrounding the TSS is separated in to parametrizable bins, and each bins gets a 1 if there is a CGI on it, 0 if else.
get_cgi_mat(window=c(5000,5000))
binlist = c("N_Shore","opensea","S_Shore","Island","S_Shelf","N_Shelf")
closest_cgi_map<-reduce_map(feat_closest_cgi,c("N_Shore","opensea","S_Shore","Island","S_Shelf","N_Shelf"))
data_reduced_healthy <- reduce_rows(meth_normal,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_healthy <- subset_vals_per_bins(data = meth_normal,
values_per_patient = data_reduced_healthy,
fun = mean,
binlist=binlist,probes_index = feat_closest_cgi)
meth_heatmap(means_per_cgi_healthy, main = "mean of means superup/healthy tissues")
sd_reduced_healthy <- reduce_rows(meth_normal,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_healthy <- subset_vals_per_bins(data = meth_normal,
values_per_patient = sd_reduced_healthy,
fun = sd,
binlist=binlist,probes_index = feat_closest_cgi)
meth_heatmap(sd_per_cgi_healthy, main = "mean of sd superup/healthy tissues")
means_reduced_tumoral <- reduce_rows(meth_tumoral,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_tumoral <- subset_vals_per_bins(data = meth_tumoral,
values_per_patient = means_reduced_tumoral,
fun = mean,
binlist=binlist,probes_index = feat_closest_cgi)
meth_heatmap(means_per_cgi_tumoral, main = "mean of means superup/tumoral tissues")
sd_reduced_tumoral <- reduce_rows(meth_tumoral,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_tumoral <- subset_vals_per_bins(data = meth_tumoral,
values_per_patient = sd_reduced_tumoral,
fun = sd,
binlist=binlist,probes_index = feat_closest_cgi)
meth_heatmap(sd_per_cgi_tumoral, main = "mean of sd superup/tumoral tissues")
means_reduced_differential <- reduce_rows(meth_diff,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_differential <- subset_vals_per_bins(data = meth_diff,
values_per_patient = means_reduced_differential,
fun = mean,
binlist=binlist,probes_index = feat_closest_cgi)
meth_heatmap(means_per_cgi_differential, main = "mean of means superup/differential values")
sd_reduced_differential <- reduce_rows(meth_diff,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_differential <- subset_vals_per_bins(data = meth_diff,
values_per_patient = sd_reduced_differential,
fun = sd,
binlist=binlist,probes_index = feat_closest_cgi)
meth_heatmap(sd_per_cgi_differential, main = "mean of sd superup/differential values")
targeted_genes <- penda_superdown_deregulated
features <- get_features(targeted_genes, study = trscr_lusc, up_str = 7500, dwn_str = 7500)
feat_closest_cgi_down <- get_genes_closest_cgis()
get_cgi_mat(window=c(5000,5000))
closest_cgi_map<-reduce_map(feat_closest_cgi_down,c("N_Shore","opensea","S_Shore","Island","S_Shelf","N_Shelf"))
data_reduced_healthy <- reduce_rows(meth_normal,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_healthy <- subset_vals_per_bins(data = meth_normal,
values_per_patient = data_reduced_healthy,
fun = mean,
binlist=binlist,probes_index = feat_closest_cgi_down)
meth_heatmap(means_per_cgi_healthy, main = "mean of means superdown/healthy tissues")
sd_reduced_healthy <- reduce_rows(meth_normal,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_healthy <- subset_vals_per_bins(data = meth_normal,
values_per_patient = sd_reduced_healthy,
fun = sd,
binlist=binlist,probes_index = feat_closest_cgi_down)
meth_heatmap(sd_per_cgi_healthy, main = "mean of sd superdown/healthy tissues")
means_reduced_tumoral <- reduce_rows(meth_tumoral,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_tumoral <- subset_vals_per_bins(data = meth_tumoral,
values_per_patient = means_reduced_tumoral,
fun = mean,
binlist=binlist,probes_index = feat_closest_cgi_down)
meth_heatmap(means_per_cgi_tumoral, main = "mean of means superdown/tumoral tissues")
sd_reduced_tumoral <- reduce_rows(meth_tumoral,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_tumoral <- subset_vals_per_bins(data = meth_tumoral,
values_per_patient = sd_reduced_tumoral,
fun = sd,
binlist=binlist,probes_index = feat_closest_cgi_down)
meth_heatmap(sd_per_cgi_tumoral, main = "mean of sd superdown/tumoral tissues")
means_reduced_differential <- reduce_rows(meth_diff,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_differential <- subset_vals_per_bins(data = meth_diff,
values_per_patient = means_reduced_differential,
fun = mean,
binlist=binlist,probes_index = feat_closest_cgi_down)
meth_heatmap(means_per_cgi_differential, main = "mean of means superdown/differential values")
sd_reduced_differential <- reduce_rows(meth_diff,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_differential <- subset_vals_per_bins(data = meth_diff,
values_per_patient = sd_reduced_differential,
fun = sd,
binlist=binlist,probes_index = feat_closest_cgi_down)
meth_heatmap(sd_per_cgi_differential, main = "mean of sd superdown/differential values")
targeted_genes <- penda_superconserved
features <- get_features(targeted_genes, study = trscr_lusc, up_str = 7500, dwn_str = 7500)
feat_closest_cgi_cons <- get_genes_closest_cgis()
closest_cgi_map<-reduce_map(feat_closest_cgi_cons,c("N_Shore","opensea","S_Shore","Island","S_Shelf","N_Shelf"))
get_cgi_mat(window=c(5000,5000))
data_reduced_healthy <- reduce_rows(meth_normal,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_healthy <- subset_vals_per_bins(data = meth_normal,
values_per_patient = data_reduced_healthy,
fun = mean,
binlist=binlist,probes_index = feat_closest_cgi_cons)
meth_heatmap(means_per_cgi_healthy, main = "mean of means supercons/healthy tissues")
sd_reduced_healthy <- reduce_rows(meth_normal,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_healthy <- subset_vals_per_bins(data = meth_normal,
values_per_patient = sd_reduced_healthy,
fun = sd,
binlist=binlist,probes_index = feat_closest_cgi_cons)
meth_heatmap(sd_per_cgi_healthy, main = "mean of sd supercons/healthy tissues")
means_reduced_tumoral <- reduce_rows(meth_tumoral,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_tumoral <- subset_vals_per_bins(data = meth_tumoral,
values_per_patient = means_reduced_tumoral,
fun = mean,
binlist=binlist,probes_index = feat_closest_cgi_cons)
meth_heatmap(means_per_cgi_tumoral, main = "mean of means supercons/tumoral tissues")
sd_reduced_tumoral <- reduce_rows(meth_tumoral,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_tumoral <- subset_vals_per_bins(data = meth_tumoral,
values_per_patient = sd_reduced_tumoral,
fun = sd,
binlist=binlist,probes_index = feat_closest_cgi_cons)
meth_heatmap(sd_per_cgi_tumoral, main = "mean of sd supercons/tumoral tissues")
means_reduced_differential <- reduce_rows(meth_diff,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_differential <- subset_vals_per_bins(data = meth_diff,
values_per_patient = means_reduced_differential,
fun = mean,
binlist=binlist,probes_index = feat_closest_cgi_cons)
meth_heatmap(means_per_cgi_differential, main = "mean of means supercons/differential values")
sd_reduced_differential <- reduce_rows(meth_diff,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_differential <- subset_vals_per_bins(data = meth_diff,
values_per_patient = sd_reduced_differential,
fun = sd,
binlist=binlist,probes_index = feat_closest_cgi_cons)
meth_heatmap(sd_per_cgi_differential, main = "mean of sd supercons/differential values")